(C) 2019 by Damir Cavar
Based on the NLTK HOWTO Dependency.
We load the DependencyGrammar module from NLTK Grammar:
In [1]:
from nltk.grammar import DependencyGrammar
We can load different Dependency Grammar parsers from NLTK:
In [2]:
from nltk.parse import (
DependencyGraph,
ProjectiveDependencyParser,
NonprojectiveDependencyParser,
)
In [3]:
treebank_data = """Pierre NNP 2 NMOD
Vinken NNP 8 SUB
, , 2 P
61 CD 5 NMOD
years NNS 6 AMOD
old JJ 2 NMOD
, , 2 P
will MD 0 ROOT
join VB 8 VC
the DT 11 NMOD
board NN 9 OBJ
as IN 9 VMOD
a DT 15 NMOD
nonexecutive JJ 15 NMOD
director NN 12 PMOD
Nov. NNP 9 VMOD
29 CD 16 NMOD
. . 9 VMOD
"""
In [4]:
dg = DependencyGraph(treebank_data)
In [5]:
dg.tree().pprint()
In [6]:
for head, rel, dep in dg.triples():
print(
'({h[0]}, {h[1]}), {r}, ({d[0]}, {d[1]})'
.format(h=head, r=rel, d=dep)
)
In [7]:
from nltk.corpus import dependency_treebank
In [8]:
t = dependency_treebank.parsed_sents()[0]
In [9]:
print(t.to_conll(3)) # doctest: +NORMALIZE_WHITESPACE
"Using the output of zpar (like Malt-TAB but with zero-based indexing)":
In [10]:
zpar_data = """
Pierre NNP 1 NMOD
Vinken NNP 7 SUB
, , 1 P
61 CD 4 NMOD
years NNS 5 AMOD
old JJ 1 NMOD
, , 1 P
will MD -1 ROOT
join VB 7 VC
the DT 10 NMOD
board NN 8 OBJ
as IN 8 VMOD
a DT 14 NMOD
nonexecutive JJ 14 NMOD
director NN 11 PMOD
Nov. NNP 8 VMOD
29 CD 15 NMOD
. . 7 P
"""
In [11]:
zdg = DependencyGraph(zpar_data, zero_based=True)
In [12]:
print(zdg.tree())
In [13]:
grammar = DependencyGrammar.fromstring("""
'fell' -> 'price' | 'stock'
'price' -> 'of' 'the'
'of' -> 'stock'
'stock' -> 'the'
""")
In [14]:
print(grammar)
In [15]:
dp = ProjectiveDependencyParser(grammar)
In [16]:
for t in sorted(dp.parse(['the', 'price', 'of', 'the', 'stock', 'fell'])):
print(t)
In [17]:
grammar = DependencyGrammar.fromstring("""
'taught' -> 'play' | 'man'
'man' -> 'the'
'play' -> 'golf' | 'dog' | 'to'
'dog' -> 'his'
""")
In [18]:
print(grammar)
In [19]:
dp = NonprojectiveDependencyParser(grammar)
In [20]:
g, = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
In [21]:
print(g.root['word'])
In [22]:
print(g)
In [23]:
x = dp.parse(['the', 'man', 'taught', 'his', 'dog', 'to', 'play', 'golf'])
for i in x:
print(i)
In [24]:
for _, node in sorted(g.nodes.items()):
if node['word'] is not None:
print('{address} {word}: {d}'.format(d=node['deps'][''], **node))
In [25]:
print(g.tree())
In [ ]: